articles$NUM_ARTICLES <- 1
tz <- sort(unique(county.years$YEAR))
t <- 1
# Direct match
cy.direct.list <- lapply(1:length(tz),function(t){
data.sub <- county.years[county.years$YEAR==tz[t],]
data.sub2 <- data.frame(CL_GEOID=data.sub[,"CL_GEOID"])
news.sub <- read.csv(paste0("Newspapers2/CountyYear/",tz[t],"_CL_AAM.csv"))
news.sub <- news.sub[,c("sid","GEOID","YEAR","AverageProjectedCirculation")]
art.sub <- articles[articles$YEAR==tz[t],]
art.sub2 <- merge(art.sub,news.sub,by.x=c("SID","YEAR"),by.y=c("sid","YEAR"),all.x=F,all.y=T)
art.sub2 <- art.sub2[,!names(art.sub2)%in%names(datez)]
disag <- sort(unique(art.sub2$GEOID))
geoid.list <- lapply(1:length(disag),function(d){print(paste(tz[t],d,"of",length(disag)))
art.subz <- art.sub2[art.sub2$GEOID%in%disag[d],]
w <- art.subz$AverageProjectedCirculation/sum(as.numeric(art.subz$AverageProjectedCirculation),na.rm=T)
art.subz2 <- art.subz[,!names(art.subz)%in%c("SID","sid","GEOID","YEAR","AverageProjectedCirculation","NUM_ARTICLES")]
for(j in 1:ncol(art.subz2)){art.subz2[is.na(art.subz2[,j]),j] <- 0}
head(art.subz2)
art.subz3 <- apply(art.subz2,2,function(x){weighted.mean(x,w=w,na.rm=T)})
w2 <- w[art.subz$NUM_ARTICLES==1]
art.subz4 <- apply(art.subz2[art.subz$NUM_ARTICLES==1,],2,function(x){weighted.mean(x,w=w2,na.rm=T)})
names(art.subz4) <- paste0(names(art.subz4),"_COV")
out <- cbind(data.frame(CL_GEOID=disag[d],YEAR=tz[t],NUM_ARTICLES= sum(art.subz$NUM_ARTICLES,na.rm=T),NUM_PAPERS=nrow(art.subz),NUM_PAPERS2=length(unique(art.subz$SID))),NUM_PAPERS_COV=length(unique(art.subz[which(art.subz$NUM_ARTICLES==1),"SID"])),t(art.subz3),t(art.subz4))
out
})
c.direct <- do.call(rbind,geoid.list)
save(c.direct,file=paste0("MergeData/Output_AAM/temp_CL_NaiveBayes_AAM_",tz[t],".RData"))
c.direct
})
cy.direct <- do.call(rbind,cy.direct.list)
for(j in 1:length(rcvarz)){cy.direct[is.na(cy.direct[,rcvarz[j]]),rcvarz[j]] <- 0}
save(cy.direct,file=paste0("MergeData/Output_AAM/temp_CL_NaiveBayes_AAM.RData"))
dim(cy.direct)
head(cy.direct)
summary(cy.direct)
# Lags
varz <- names(cy.direct)[!names(cy.direct)%in%c("CL_GEOID","YEAR")]
cy.direct <- streg.sort(data=cy.direct, unitvar="CL_GEOID", timevar="YEAR")
cy.direct.lagz <- cy.direct
for(j in 1:length(varz)){
cy.direct.lagz[,varz[j]] <- streg.tlag(cy.direct, unitvar="CL_GEOID", timevar="YEAR", lagvar=varz[j])
}
names(cy.direct.lagz)[names(cy.direct.lagz)%in%varz] <- paste0(names(cy.direct.lagz)[names(cy.direct.lagz)%in%varz],"_t")
cy.direct$MERGEKEY <- paste(cy.direct$CL_GEOID,cy.direct$YEAR)
cy.direct.lagz$MERGEKEY <- paste(cy.direct.lagz$CL_GEOID,cy.direct.lagz$YEAR)
ixz <- Reduce(intersect, list(cy.direct$MERGEKEY,cy.direct.lagz$MERGEKEY))
cy.direct <- cy.direct[cy.direct$MERGEKEY%in%ixz,]
cy.direct.lagz <- cy.direct.lagz[cy.direct.lagz$MERGEKEY%in%ixz,]
cy.direct <- cy.direct[!duplicated(cy.direct$MERGEKEY),]
cy.direct.lagz <- cy.direct.lagz[!duplicated(cy.direct.lagz$MERGEKEY),]
cy.direct$MERGEKEY <- NULL
cy.direct.lagz$MERGEKEY <- NULL
cy.direct.lagz <- merge(cy.direct,cy.direct.lagz,by=c("CL_GEOID","YEAR"),all.x=T,all.y=T)
cy.data <- merge(cy.direct.lagz,county.years,by=c("CL_GEOID","YEAR"),all.x=T,all.y=F)
save(cy.data,file=paste0("DATA/DATA",tst,"/DATA_CountyYears_ALL_NaiveBayes_AAM.RData"))
#write.csv(cy.data,file=paste0("DATA/DATA",tst,"/DATA_CountyYears_ALL_NaiveBayes_AAM.csv"),row.names=F)
names(cy.data) <- gsub("_COV_t","_C_t",names(cy.data))
write.dta(cy.data,file=paste0("DATA/DATA",tst,"/DATA_CountyYears_ALL_NaiveBayes_AAM.dta"),convert.factors="labels",convert.dates=F)
# Number of chunks
m <- 32
load(paste0("MergeData/Output_NB",tst,"/","mData_KINDRED","_m_",m,".RData"))
ls()
# # Combined scores
# varzf <- c("VICTIM_PHYS_STATE","VICTIM_LOCATION","CREDIBILITY_INCONSISTENCIES",
#            "CONSENT_NO_RESIST","CONSENT_PAST_ROMANCE","CONSENT_INTERCOURSE",
#            "OTHER_MONSTER","OTHER_GRAPHIC_VIOLENCE","OTHER_GRAPHIC_SEX","PRIVACYVIC_NAME",
#            "PRIVACYVIC_AGE","PRIVACYVIC_PROFESSION","PRIVACYACC_NAME","PRIVACYACC_AGE")
# mdata$RAPECULTURE_14 <- apply(mdata[,varzf],1,function(x){sum(x,na.rm=T)})
# varzf <- c("VICTIM_LOCATION","CONSENT_INTERCOURSE","OTHER_GRAPHIC_VIOLENCE",
#            "OTHER_GRAPHIC_SEX","PRIVACYVIC_AGE","PRIVACYACC_NAME","PRIVACYACC_AGE",
#            "VICTIM_PHYS_STATE","CONSENT_PAST_ROMANCE")
# mdata$RAPECULTURE_9 <- apply(mdata[,varzf],1,function(x){sum(x,na.rm=T)})
# varzf <- c("VICTIM_ANY","EMPATHY_ANY","CREDIBILITY_ANY","CONSENT_ANY",
#            "PRIVACYVIC_ANY","PRIVACYACC_ANY")
# mdata$RAPECULTURE_6 <- apply(mdata[,varzf],1,function(x){sum(x,na.rm=T)})
varzf <- c("VICTIM_ANY","EMPATHY_ANY","CREDIBILITY_ANY","CONSENT_ANY")
mdata$RAPECULTURE_NONE <- apply(mdata[,varzf],1,function(x){1*(sum(x,na.rm=T)==0)})
mdata$RAPECULTURE_ANY <- apply(mdata[,varzf],1,function(x){1*(sum(x,na.rm=T)>0)})
mdata$RAPECULTURE_ALL <- apply(mdata[,varzf],1,function(x){1*(sum(x,na.rm=T)==4)})
rcvarz <- c(names(mdata)[-1])
# Merge with paper.match
articles <- merge(paper.match,mdata,by.x="AID",by.y="DOCID",all.x=F,all.y=T)
head(articles)[,1:10]
# Merge with newspaper/county/state covariates
articles <- articles[!is.na(articles$SID),]
articles <- articles[!is.na(articles$YEAR),]
articles$NUM_ARTICLES <- 1
tz <- sort(unique(county.years$YEAR))
t <- 1
# Direct match
cy.direct.list <- lapply(1:length(tz),function(t){
data.sub <- county.years[county.years$YEAR==tz[t],]
data.sub2 <- data.frame(CL_GEOID=data.sub[,"CL_GEOID"])
news.sub <- read.csv(paste0("Newspapers2/CountyYear/",tz[t],"_CL_AAM.csv"))
news.sub <- news.sub[,c("sid","GEOID","YEAR","AverageProjectedCirculation")]
art.sub <- articles[articles$YEAR==tz[t],]
art.sub2 <- merge(art.sub,news.sub,by.x=c("SID","YEAR"),by.y=c("sid","YEAR"),all.x=F,all.y=T)
art.sub2 <- art.sub2[,!names(art.sub2)%in%names(datez)]
disag <- sort(unique(art.sub2$GEOID))
geoid.list <- lapply(1:length(disag),function(d){print(paste(tz[t],d,"of",length(disag)))
art.subz <- art.sub2[art.sub2$GEOID%in%disag[d],]
w <- art.subz$AverageProjectedCirculation/sum(as.numeric(art.subz$AverageProjectedCirculation),na.rm=T)
art.subz2 <- art.subz[,!names(art.subz)%in%c("SID","sid","GEOID","YEAR","AverageProjectedCirculation","NUM_ARTICLES")]
for(j in 1:ncol(art.subz2)){art.subz2[is.na(art.subz2[,j]),j] <- 0}
head(art.subz2)
art.subz3 <- apply(art.subz2,2,function(x){weighted.mean(x,w=w,na.rm=T)})
w2 <- w[art.subz$NUM_ARTICLES==1]
art.subz4 <- apply(art.subz2[art.subz$NUM_ARTICLES==1,],2,function(x){weighted.mean(x,w=w2,na.rm=T)})
names(art.subz4) <- paste0(names(art.subz4),"_COV")
out <- cbind(data.frame(CL_GEOID=disag[d],YEAR=tz[t],NUM_ARTICLES= sum(art.subz$NUM_ARTICLES,na.rm=T),NUM_PAPERS=nrow(art.subz),NUM_PAPERS2=length(unique(art.subz$SID))),NUM_PAPERS_COV=length(unique(art.subz[which(art.subz$NUM_ARTICLES==1),"SID"])),t(art.subz3),t(art.subz4))
out
})
c.direct <- do.call(rbind,geoid.list)
save(c.direct,file=paste0("MergeData/Output_AAM/temp_CL_KINDRED_NaiveBayes_AAM_",tz[t],".RData"))
c.direct
})
cy.direct <- do.call(rbind,cy.direct.list)
for(j in 1:length(rcvarz)){cy.direct[is.na(cy.direct[,rcvarz[j]]),rcvarz[j]] <- 0}
save(cy.direct,file=paste0("MergeData/Output_AAM/temp_CL_KINDRED_NaiveBayes_AAM.RData"))
dim(cy.direct)
head(cy.direct)
# Lags
varz <- names(cy.direct)[!names(cy.direct)%in%c("CL_GEOID","YEAR")]
cy.direct <- streg.sort(data=cy.direct, unitvar="CL_GEOID", timevar="YEAR")
cy.direct.lagz <- cy.direct
for(j in 1:length(varz)){
cy.direct.lagz[,varz[j]] <- streg.tlag(cy.direct, unitvar="CL_GEOID", timevar="YEAR", lagvar=varz[j])
}
names(cy.direct.lagz)[names(cy.direct.lagz)%in%varz] <- paste0(names(cy.direct.lagz)[names(cy.direct.lagz)%in%varz],"_t")
cy.direct$MERGEKEY <- paste(cy.direct$CL_GEOID,cy.direct$YEAR)
cy.direct.lagz$MERGEKEY <- paste(cy.direct.lagz$CL_GEOID,cy.direct.lagz$YEAR)
ixz <- Reduce(intersect, list(cy.direct$MERGEKEY,cy.direct.lagz$MERGEKEY))
cy.direct <- cy.direct[cy.direct$MERGEKEY%in%ixz,]
cy.direct.lagz <- cy.direct.lagz[cy.direct.lagz$MERGEKEY%in%ixz,]
cy.direct <- cy.direct[!duplicated(cy.direct$MERGEKEY),]
cy.direct.lagz <- cy.direct.lagz[!duplicated(cy.direct.lagz$MERGEKEY),]
cy.direct$MERGEKEY <- NULL
cy.direct.lagz$MERGEKEY <- NULL
cy.direct.lagz <- merge(cy.direct,cy.direct.lagz,by=c("CL_GEOID","YEAR"),all.x=T,all.y=T)
cy.data <- merge(cy.direct.lagz,county.years,by=c("CL_GEOID","YEAR"),all.x=T,all.y=F)
# Save
save(cy.data,file=paste0("DATA/DATA",tst,"/DATA_CountyYears_KINDRED_NaiveBayes_AAM.RData"))
#write.csv(cy.data,file=paste0("DATA/DATA",tst,"/DATA_CountyYears_KINDRED_NaiveBayes_AAM.csv"),row.names=F)
names(cy.data) <- gsub("_COV_t","_C_t",names(cy.data))
write.dta(cy.data,file=paste0("DATA/DATA",tst,"/DATA_CountyYears_KINDRED_NaiveBayes_AAM.dta"),convert.factors="labels",convert.dates=F)
rm(list=ls()); tst <- c("_NOERROR","_ONTOPIC")[1]
# install.packages("zoo")
library(zoo)
# install.packages("stringr")
library(stringr)
library(xtable)
# install.packages("gdata")
library(gdata)
library(maptools)
#install.packages('foreign')
# install.packages('RCurl')
library(foreign)
#install.packages("rgeos")
library(rgeos)
library(spdep)
setwd("F:/Dropbox (Personal)/BCZ_All/BCZ/Data")
setwd("~/Dropbox (Personal)/BCZ_All/BCZ/Data")
setwd("~/Dropbox/BCZ_All/BCZ/Data")
rm(list=ls()); tst <- c("_NOERROR","_ONTOPIC")[1]
load("Newspapers/NewspaperWorkspace_v2.RData")
#load("MergeData/Output2/PaperStateYears_MI.RData")
load("MergeData/Output2/PaperCountyYears_MI.RData")
head(paperz.cy)
load("Corpus/BCZ_Corpus_US_v2.RData");
load("MergeData/Output2/CountyYears_Interpolated_MI.RData")
load("MergeData/Geo/CL_Geo.RData")
datez <- corpus_US[,c("AID","YEAR","MONTH","MONTH_NUM","DAY","YRMO","DATE")]
rm(corpus_US)
paper.match <- paper.match[,c("AID","SID")]
paper.match <- merge(paper.match,datez,by="AID",all.x=T,all.y=F)
source("MergeData/functions.R")
dir(paste0("DATA/DATA",tst))
dir(paste0("DATA/DATA",tst))[grep("AAM",dir(paste0("DATA/DATA",tst)))]
dir(paste0("DATA/DATA",tst))[grep("AAM&RData",dir(paste0("DATA/DATA",tst)))]
dir(paste0("DATA/DATA",tst))[grep("AAM&&RData",dir(paste0("DATA/DATA",tst)))]
dir(paste0("DATA/DATA",tst))[grep("AAM\\&RData",dir(paste0("DATA/DATA",tst)))]
dir(paste0("DATA/DATA",tst))[grep("AAM\&RData",dir(paste0("DATA/DATA",tst)))]
dir(paste0("DATA/DATA",tst))[grep("AAM|RData",dir(paste0("DATA/DATA",tst)))]
dir(paste0("DATA/DATA",tst))[grep("AAM",dir(paste0("DATA/DATA",tst)))]
filz <- c("DATA_CountyYears_ALL_NaiveBayes_AAM.RData","DATA_CountyYears_ALL_SVM_AAM.RData","DATA_CountyYears_AUCMAT_SVM_AAM.RData","DATA_CountyYears_KINDRED_NaiveBayes_AAM.RData","DATA_CountyYears_KINDRED_SVM_AAM.RData")
gsub("_AAM","",filz)
gsub("_AAM","",filz)%in%dir(paste0("DATA/DATA",tst))
k <- 1
load(paste0("DATA/DATA",tst,"/",filz[k]))
head(cy.data)
load(paste0("DATA/DATA",tst,"/",gsub("_AAM","",filz)[k]))
head(cy.data)
dim(cy.data)
load(paste0("DATA/DATA",tst,"/",filz[k]))
dim(cy.data)
rm(cy.data)
load(paste0("DATA/DATA",tst,"/",gsub("_AAM","",filz)[k]))
cy1 <- cy.data; rm(cy.data)
load(paste0("DATA/DATA",tst,"/",filz[k]))
names(cy1)[!names(cy1)%in%names(cy.data)]
varz.mis <- names(cy1)[!names(cy1)%in%names(cy.data)]
head(cy.data)
cy.data$TEMP <- paste0(cy.data$CL_GEOID,"_",cy.data$YEAR)
cy1$TEMP <- paste0(cy1$CL_GEOID,"_",cy1$YEAR)
cy.data <- merge(cy.data,cy1,by="TEMP",all.x=T,all.y=F)
cy1 <- cy.data; rm(cy.data)
load(paste0("DATA/DATA",tst,"/",gsub("_AAM","",filz)[k]))
cy1 <- cy.data; rm(cy.data)
load(paste0("DATA/DATA",tst,"/",filz[k]))
varz.mis <- names(cy1)[!names(cy1)%in%names(cy.data)]
cy.data$TEMP <- paste0(cy.data$CL_GEOID,"_",cy.data$YEAR)
cy1$TEMP <- paste0(cy1$CL_GEOID,"_",cy1$YEAR)
cy.data <- merge(cy.data,cy1[,c("TEMP",varz.mis)],by="TEMP",all.x=T,all.y=F)
head(cy.data)
tz <- sort(unique(county.years$YEAR))
tz <- sort(unique(county.years$YEAR))
t <- 1
news.sub <- read.csv(paste0("Newspapers2/CountyYear/",tz[t],"_CL_AAM.csv"))
head(news.sub)
news.sub <- aggregate(news.sub[,c("TotalCirculation","AverageProjectedCirculation")],by=list(GEOID=news.sub$GEOID,YEAR=news.sub$YEAR),FUN=function(x){sum(x,na.rm=T)})
head(news.sub)
names(news.sub)[3:4]
names(news.sub)[3:4] <- c("TOTAL_CIRC","AVGPJ_CIRC")
tz <- sort(unique(county.years$YEAR))
cy.list <- lapply(1:length(tz),function(t){
news.sub <- read.csv(paste0("Newspapers2/CountyYear/",tz[t],"_CL_AAM.csv"))
news.sub <- aggregate(news.sub[,c("TotalCirculation","AverageProjectedCirculation")],by=list(GEOID=news.sub$GEOID,YEAR=news.sub$YEAR),FUN=function(x){sum(x,na.rm=T)})
names(news.sub)[3:4] <- c("TOTAL_CIRC","AVGPJ_CIRC")
news.sub
})
cy.mat <- do.call(rbind,cy.list); rm(cy.list)
cy.mat$TEMP <- paste0(cy.mat$GEOID,"_",cy.mat$YEAR)
cy.mat$GEOID <- NULL
cy.mat$YEAR <- NULL
k <- 1
load(paste0("DATA/DATA",tst,"/",gsub("_AAM","",filz)[k]))
cy1 <- cy.data; rm(cy.data)
load(paste0("DATA/DATA",tst,"/",filz[k]))
varz.mis <- names(cy1)[!names(cy1)%in%names(cy.data)]
cy.data$TEMP <- paste0(cy.data$CL_GEOID,"_",cy.data$YEAR)
cy1$TEMP <- paste0(cy1$CL_GEOID,"_",cy1$YEAR)
cy.data <- merge(cy.data,cy1[,c("TEMP",varz.mis)],by="TEMP",all.x=T,all.y=F)
cy.data <- merge(cy.data,cy.mat,by="TEMP",all.x=T,all.y=F)
head(cy.data)
filz
gsub("_AAM","_AAM2",filz[k])
paste0("DATA/DATA",tst,"/",gsub("_AAM","_AAM2",filz[k]))
k <- 1
for(k in 1:length(filz)){print(k)
load(paste0("DATA/DATA",tst,"/",gsub("_AAM","",filz)[k]))
cy1 <- cy.data; rm(cy.data)
load(paste0("DATA/DATA",tst,"/",filz[k]))
varz.mis <- names(cy1)[!names(cy1)%in%names(cy.data)]
cy.data$TEMP <- paste0(cy.data$CL_GEOID,"_",cy.data$YEAR)
cy1$TEMP <- paste0(cy1$CL_GEOID,"_",cy1$YEAR)
cy.data <- merge(cy.data,cy1[,c("TEMP",varz.mis)],by="TEMP",all.x=T,all.y=F)
cy.data <- merge(cy.data,cy.mat,by="TEMP",all.x=T,all.y=F)
save(cy.data,file=paste0("DATA/DATA",tst,"/",gsub("_AAM","_AAM2",filz[k])))
names(cy.data) <- gsub("_COV_t","_C_t",names(cy.data))
write.dta(cy.data,file=paste0("DATA/DATA",tst,"/",gsub("_AAM.RData","_AAM2.dta",filz[k])),convert.factors="labels",convert.dates=F)
rm(cy.data)
}
paste0("DATA/DATA",tst,"/",gsub("_AAM.RData","_AAM2.dta",filz[k]))
library(spdep)
rm(list=ls())
library(maptools,spdep)
rm(list=ls())
library(maptools,spdep)
detachAllPackages <- function() {
basic.packages <- c("package:stats","package:graphics","package:grDevices","package:utils","package:datasets","package:methods","package:base")
package.list <- search()[ifelse(unlist(gregexpr("package:",search()))==1,TRUE,FALSE)]
package.list <- setdiff(package.list,basic.packages)
if (length(package.list)>0)  for (package in package.list) detach(package, character.only=TRUE)
}
detachAllPackages()
library(maptools,spdep)
setwd("~/Dropbox/Stalin/Data")
dir("Admin/")
map <- readShapePoly("Admin/adm6_district_f")
spplot(map)
map <- readShapePoly("Admin/regions2010_sib_10")
spplot(map)
plot(map)
dir("Admin/")
map <- readShapePoly("Admin/RUS_adm3.rds")
load("Admin/RUS_adm3.rds")
load("Admin/RUS_adm3.rData")
source("Admin/RUS_adm3.rds")
source("Admin/RUS_adm2.rds")
map <- readRDS("Admin/RUS_adm2.rds")
plot(map)
dev.off()
dev.off()
plot(map)
q()
load("Dropbox/Stalin/Data/Analysis/Output/IV_ME_200km__POP_v2.RData")
mat
mat[,-3]
mat[mat$p<.05,-3]
list.of.packages <- c("stargazer","gdata","spdep","maptools", "GISTools","geosphere","rgdal","rgeos","AER","spdep","raster","xtable","mvtnorm","stringr","cem","MatchIt")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]; if(length(new.packages)){install.packages(new.packages)}
lapply(list.of.packages, require, character.only = TRUE)
rm(list=ls())
Sys.setlocale('LC_ALL', 'russian')
setwd("F://Dropbox (Personal)/BELARUS")
setwd("~/Dropbox (Personal)/BELARUS")
setwd("~/Dropbox/BELARUS")
source("functions.R")
source("Analysis/functions.R")
list.of.packages <- c("stargazer","gdata","spdep","maptools", "GISTools","geosphere","rgdal","rgeos","AER","spdep","raster","xtable","mvtnorm","stringr","cem","MatchIt","RCurl")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]; if(length(new.packages)){install.packages(new.packages)}
lapply(list.of.packages, require, character.only = TRUE)
rm(list=ls())
Sys.setlocale('LC_ALL', 'russian')
setwd("F://Dropbox (Personal)/BELARUS")
setwd("~/Dropbox (Personal)/BELARUS")
setwd("~/Dropbox/BELARUS")
source("Analysis/functions.R")
rail <- readShapeLines("Shapes/BELARUS_RAIL")
oblasts <- readShapePoly("Shapes/BELARUS_OBLASTS")
load("RailMats/OD_Mats_BY.RData")
load("RailMats/railLines.RData")
load("Shapes/BELARUS_RAYONS_v8.RData")
load("Shapes/BELARUS_VILLAGES_v2.RData")
rail <- crop(as(rail,"SpatialLines"),oblasts)
list.of.packages <- c("stargazer","gdata","spdep","maptools", "GISTools","geosphere","rgdal","rgeos","AER","spdep","raster","xtable","mvtnorm","stringr","cem","MatchIt","RCurl")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]; if(length(new.packages)){install.packages(new.packages)}
lapply(list.of.packages, require, character.only = TRUE)
rm(list=ls())
Sys.setlocale('LC_ALL', 'russian')
setwd("~/Dropbox/BELARUS/WP/ReplicationFiles/")
source("functions.R")
oblasts <- readShapePoly("Shapes/BELARUS_OBLASTS")
rail <- readShapeLines("Shapes/BELARUS_RAIL")
oblasts <- readShapePoly("Shapes/BELARUS_OBLASTS")
load("Shapes/BELARUS_RAYONS_v8.RData")
load("Shapes/BELARUS_VILLAGES_v2.RData")
rail <- crop(as(rail,"SpatialLines"),oblasts)
borders <- readShapePoly("Shapes/April_30_1941")
proj4string(borders) <- CRS("+proj=aea +lat_1=43 +lat_2=62 +lat_0=30 +lon_0=10 +x_0=0 +y_0=0 +ellps=intl +towgs84=-87,-98,-121,0,0,0,0 +units=m +no_defs")
proj4string(map) <- CRS("+proj=longlat +ellps=WGS84 +datum=WGS84")
borders <- spTransform(borders,CRS("+proj=longlat +ellps=WGS84 +datum=WGS84"))
borders <- gBuffer(borders, byid=TRUE, width=0)
bb <- bbox(map)
bb[1,1] <- bb[1,1]-(bb[1,2]-bb[1,1])/8
bb[1,2] <- bb[1,2]+(bb[1,2]-bb[1,1])/8
bb[2,1] <- bb[2,1]-(bb[2,2]-bb[2,1])/8
bb[2,2] <- bb[2,2]+(bb[2,2]-bb[2,1])/8
WKTs <- paste("POLYGON((",bb[1,1],bb[2,1],",",bb[1,2],bb[2,1],",",bb[1,2],bb[2,2],",",bb[1,1],bb[2,2],",",bb[1,1],bb[2,1],"))")
test <- readWKT(WKTs)
r <- borders
proj4string(test) <- CRS(proj4string(map))
proj4string(r) <- CRS(proj4string(map))
borders2 <- crop(r, test)
# scale points
pnts.size <- function(x,scale=.2){
ifelse(x<=1,scale*1,
ifelse(x>1&x<=10,scale*2,
ifelse(x>10&x<=50,scale*3,
ifelse(x>10&x<=100,scale*4,
ifelse(x>100&x<=500,scale*5,scale*6)))))}
cexs <- pnts.size(sp.villages$HOUSES_DESTROYED,scale=.15)
par(mar=c(0,0,0,0))
plot(test,col="lightblue",border=NA)
plot(borders2,add=T,col="beige",border="grey")
plot(map,add=T,col="beige")
scl <- map$CONVOYS_PROP
#plot(map,col=rgb(1-scl,1-scl,1-scl,.5),add=T,border="lightgray",lwd=.1)
plot(map,col=rgb(scl/2,scl/3,scl,.75),add=T,border="lightgray",lwd=.1)
lines(rail,lty=1,col="black",lwd=3)
lines(rail,lty=1,col="yellow",lwd=2)
lls <- c(bbox(map)[1,1]-2.5,mean(c(bbox(map)[2,1]+1.2,1.1*(bbox(map)[2,2]-.7))))
text(x=lls[1]+2.2,y=lls[2]-.1,labels=c("Partisan violence:"),cex=.7,pos=4)
legend(x=lls[1]+2.3,y=lls[2]-.1,fill=c(rgb(.5,.33,1,.75),rgb(0,0,0,.75),rgb(.25,.167,.5,.75)),legend=c("100% interdiction","100% coercion","50%-50%"),bty="n",cex=.65,ncol=3)
text(bb[1,1]-.1,bb[2,1]+.5,labels="General Government\n(Poland)",pos=4,cex=.5,col="grey")
text(bb[1,1]+4,bb[2,1]+.8,labels="Soviet Union",pos=4,cex=.5,col="grey")
text(bb[1,1]-.1,bb[2,2]-3,labels="Germany",pos=4,cex=.5,col="grey")
legend(x=bb[1,2]-3.5,y=bb[2,2]+.1,lwd=3,col="black",legend="Railroad",bty="n",cex=.7)
legend(x=bb[1,2]-3.5,y=bb[2,2]+.1,lwd=1,col="yellow",legend="Railroad",bty="n",cex=.7)
load("Data/BELARUS_RAYONS_MONTHS_Full.RData")
mat <- rayons.panel
ts.vec <- aggregate(mat[,c("CONVOYS","COERCION","HOUSES_DESTROYED","PEOPLE_KILLED","REPRISAL")],by=list(YRMO=mat$YRMO),FUN=function(x){sum(x,na.rm=T)})
ts.vec$TID <- 1:nrow(ts.vec)
load("Data/BELARUS_RAYONS_MONTHS_Full.RData")
mat <- rayons.panel
varz <- c("PEOPLE_KILLED","HOUSES_DESTROYED","REPRISAL")
crosstab2 <- lapply(1:3,function(v){
vard <- paste0(varz[v],"_p1")
vard_t <- paste0(varz[v],"_t1")
# Post
y00 <- sum(mat[,vard][mat$CONVOYS==0&mat$COERCION==0],na.rm=T)/sum(mat[,vard],na.rm=T)
y00p <- mean(mat[,vard][mat$CONVOYS==0&mat$COERCION==0],na.rm=T)
y10 <- sum(mat[,vard][mat$CONVOYS>0&mat$COERCION==0],na.rm=T)/sum(mat[,vard],na.rm=T)
y10p <- mean(mat[,vard][mat$CONVOYS>0&mat$COERCION==0],na.rm=T)
y01 <- sum(mat[,vard][mat$CONVOYS==0&mat$COERCION>0],na.rm=T)/sum(mat[,vard],na.rm=T)
y01p <- mean(mat[,vard][mat$CONVOYS==0&mat$COERCION>0],na.rm=T)
y11 <- sum(mat[,vard][mat$CONVOYS>0&mat$COERCION>0],na.rm=T)/sum(mat[,vard],na.rm=T)
y11p <- mean(mat[,vard][mat$CONVOYS>0&mat$COERCION>0],na.rm=T)
# Pre
y00_t <- sum(mat[,vard_t][mat$CONVOYS==0&mat$COERCION==0],na.rm=T)/sum(mat[,vard_t],na.rm=T)
y00p_t <- mean(mat[,vard_t][mat$CONVOYS==0&mat$COERCION==0],na.rm=T)
y10_t <- sum(mat[,vard_t][mat$CONVOYS>0&mat$COERCION==0],na.rm=T)/sum(mat[,vard_t],na.rm=T)
y10p_t <- mean(mat[,vard_t][mat$CONVOYS>0&mat$COERCION==0],na.rm=T)
y01_t <- sum(mat[,vard_t][mat$CONVOYS==0&mat$COERCION>0],na.rm=T)/sum(mat[,vard_t],na.rm=T)
y01p_t <- mean(mat[,vard_t][mat$CONVOYS==0&mat$COERCION>0],na.rm=T)
y11_t <- sum(mat[,vard_t][mat$CONVOYS>0&mat$COERCION>0],na.rm=T)/sum(mat[,vard_t],na.rm=T)
y11p_t <- mean(mat[,vard_t][mat$CONVOYS>0&mat$COERCION>0],na.rm=T)
# Diff in diff
mat$DIFF_TEMP <- mat[,vard]-mat[,vard_t]
vardd <- "DIFF_TEMP"
y00d <- mean(mat[,vardd][mat$CONVOYS==0&mat$COERCION==0],na.rm=T)
y10d <- mean(mat[,vardd][mat$CONVOYS>0&mat$COERCION==0],na.rm=T)
y01d <- mean(mat[,vardd][mat$CONVOYS==0&mat$COERCION>0],na.rm=T)
y11d <- mean(mat[,vardd][mat$CONVOYS>0&mat$COERCION>0],na.rm=T)
# cross-tab
crosstab <- data.frame(
Interdiction0=c(
paste0(round(y00p,2)),
paste0(round(y00p_t,2)),
paste0("(",ifelse(y00d>=0,"+",""),round(y00d,2),")"),
paste0(round(y01p,2)),
paste0(round(y01p_t,2)),
paste0("(",ifelse(y01d>=0,"+",""),round(y01d,2),")")
),
Interdiction1=c(
paste0(round(y10p,2)),
paste0(round(y10p_t,2)),
paste0("(",ifelse(y10d>=0,"+",""),round(y10d,2),")"),
paste0(round(y11p,2)),
paste0(round(y11p_t,2)),
paste0("(",ifelse(y11d>=0,"+",""),round(y11d,2),")")
))
names(crosstab) <- paste0(varz[v],"_",0:1)
crosstab
})
crosstab2 <- do.call(cbind,crosstab2)
xtable(crosstab2)
# Village-level
load("Data/BELARUS_VILLAGES_MONTHS_Full.RData")
head(villages.panel)
temp <- data.frame(VID=villages.panel$VID,LONG=villages.panel$LONG,LAT=villages.panel$LAT,DIST2BERLIN=NA)
temp <- temp[!duplicated(temp$VID),]
berlin <- c(13.383333,52.516667)
coords <- temp[,c("LONG","LAT")]
for(i in 1:nrow(coords)){temp[i,"DIST2BERLIN"] <- dist(rbind(berlin,coords[i,]))}
temp$LONG <- temp$LAT <- NULL
X <- villages.panel
X <- merge(X,temp,by="VID",all.x=T,all.y=F)
deg2km <- function(x){x*111.1949}
X$DIST2BERLIN <- deg2km(X$DIST2BERLIN)
varz <- c("PEOPLE_KILLED","HOUSES_DESTROYED","REPRISAL","CONVOYS","COERCION","CONVOYS_PROP","CENT_BTWN_R","DIST_RAIL","URBAN100","OPEN_TERRAIN","ETH_JUD","CONTROL_AREA","DISTLIB_MIN","PEOPLE_PRE","HOUSES_PRE","DIST2BERLIN")
labz <- c("People killed","Houses destroyed","Number of reprisals","Partisan interdiction","Partisan coercion","Partisan interdiction (proportion)","Rail network centrality","Distance to rail station","Urbanization","Open terrain","Percent Jewish","Partisan control","Distance from frontline","People (pre-war)","Houses (pre-war)","Distance to Berlin")
# Rayon-level
load("Data/BELARUS_RAYONS_MONTHS_Full.RData")
load("Data/BELARUS_RAYONS_Full_sp.RData")
map_crd <- data.frame(RAYON_ID=map$RAYON_ID,LONG=coordinates(map)[,1],LAT=coordinates(map)[,2])
rayons.panel <- merge(rayons.panel,map_crd,by="RAYON_ID")
temp <- data.frame(RAYON_ID=map$RAYON_ID,DIST2BERLIN=NA)
berlin <- c(13.383333,52.516667)
coords <- coordinates(map)
for(i in 1:nrow(coords)){temp[i,"DIST2BERLIN"] <- dist(rbind(berlin,coords[i,]))}
deg2km <- function(x){x*111.1949}
X <- rayons.panel
X <- merge(X,temp,by="RAYON_ID",all.x=T,all.y=F)
X$DIST2BERLIN <- deg2km(X$DIST2BERLIN)
varz <- c("PEOPLE_KILLED","HOUSES_DESTROYED","REPRISAL","CONVOYS","COERCION","CONVOYS_PROP","CENT_BTWN_R","DIST_RAIL","URBAN100","OPEN_TERRAIN","ETH_JUD","CONTROL_AREA","DISTLIB_MIN","PEOPLE_PRE","HOUSES_PRE","DIST2BERLIN")
labz <- c("People killed","Houses destroyed","Number of reprisals","Partisan interdiction","Partisan coercion","Partisan interdiction (proportion)","Rail network centrality","Distance to rail station","Urbanization","Open terrain","Percent Jewish","Partisan control","Distance from frontline","People (pre-war)","Houses (pre-war)","Distance to Berlin")
# load data
load("Data/BELARUS_RAYONS_MONTHS_Full.RData")
# no. of rayons
length(unique(rayons.panel$RAYON_ID))
# no. of time periods
length(unique(rayons.panel$YRMO))
# date range
range(rayons.panel$YRMO)
# N
dim(rayons.panel)
# spatial data
load("Data/BELARUS_RAYONS_Full_sp.RData")
map_crd <- data.frame(RAYON_ID=map$RAYON_ID,LONG=coordinates(map)[,1],LAT=coordinates(map)[,2])
rayons.panel <- merge(rayons.panel,map_crd,by="RAYON_ID")
mod <- gam(HOUSES_DESTROYED_p1~as.factor(YRMO)+CONVOYS + COERCION +CENT_BTWN_R+DIST_RAIL+URBAN100+OPEN_TERRAIN+ETH_JUD+CONTROL_AREA+DISTLIB_MIN+HOUSES_PRE+HOUSES_DESTROYED+s(LONG,LAT),data=rayons.panel,family="quasipoisson");summary(mod);#qaic(mod)
mod3 <- mod
mod <- gam(HOUSES_DESTROYED_p1~as.factor(YRMO)+CONVOYS_PROP+CENT_BTWN_R+DIST_RAIL+URBAN100+OPEN_TERRAIN+ETH_JUD+CONTROL_AREA+DISTLIB_MIN+HOUSES_PRE +HOUSES_DESTROYED+s(LONG,LAT),data=rayons.panel,family="quasipoisson");summary(mod);#qaic(mod)
mod4 <- mod
list.of.packages <- c("stargazer","gdata","spdep","maptools", "GISTools","geosphere","rgdal","rgeos","AER","spdep","raster","xtable","mvtnorm","stringr","cem","MatchIt","RCurl","mcgv")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]; if(length(new.packages)){install.packages(new.packages)}
lapply(list.of.packages, require, character.only = TRUE)
list.of.packages <- c("stargazer","gdata","spdep","maptools", "GISTools","geosphere","rgdal","rgeos","AER","spdep","raster","xtable","mvtnorm","stringr","cem","MatchIt","RCurl","mgcv")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]; if(length(new.packages)){install.packages(new.packages)}
lapply(list.of.packages, require, character.only = TRUE)
# load data
load("Data/BELARUS_RAYONS_MONTHS_Full.RData")
# no. of rayons
length(unique(rayons.panel$RAYON_ID))
# no. of time periods
length(unique(rayons.panel$YRMO))
# date range
range(rayons.panel$YRMO)
# N
dim(rayons.panel)
# spatial data
load("Data/BELARUS_RAYONS_Full_sp.RData")
map_crd <- data.frame(RAYON_ID=map$RAYON_ID,LONG=coordinates(map)[,1],LAT=coordinates(map)[,2])
rayons.panel <- merge(rayons.panel,map_crd,by="RAYON_ID")
mod <- gam(HOUSES_DESTROYED_p1~as.factor(YRMO)+CONVOYS + COERCION +CENT_BTWN_R+DIST_RAIL+URBAN100+OPEN_TERRAIN+ETH_JUD+CONTROL_AREA+DISTLIB_MIN+HOUSES_PRE+HOUSES_DESTROYED+s(LONG,LAT),data=rayons.panel,family="quasipoisson");summary(mod);#qaic(mod)
mod3 <- mod
mod <- gam(HOUSES_DESTROYED_p1~as.factor(YRMO)+CONVOYS_PROP+CENT_BTWN_R+DIST_RAIL+URBAN100+OPEN_TERRAIN+ETH_JUD+CONTROL_AREA+DISTLIB_MIN+HOUSES_PRE +HOUSES_DESTROYED+s(LONG,LAT),data=rayons.panel,family="quasipoisson");summary(mod);#qaic(mod)
mod4 <- mod
mod1$model
mod <- gam(PEOPLE_KILLED_p1~as.factor(YRMO)+CONVOYS + COERCION+CENT_BTWN_R+DIST_RAIL+URBAN100+OPEN_TERRAIN+ETH_JUD+CONTROL_AREA+DISTLIB_MIN +PEOPLE_PRE+PEOPLE_KILLED+s(LONG,LAT),data=rayons.panel,family="quasipoisson");summary(mod);#qaic(mod)
